Importing data
designMatrix <- ReadDataFrameFromTsv("../../design/all_samples_short_names.tsv.csv")
## ../../design/all_samples_short_names.tsv.csv read from disk!
exonmatrix <- ReadDataFrameFromTsv(file.name.path="../../data/NEW_PFC_exonMatrix.txt",
row.names.col=NULL)
## ../../data/NEW_PFC_exonMatrix.txt read from disk!
counts <- exonmatrix[,c(7:dim(exonmatrix)[2])]
exonDesignMatrix <- cbind(colnames(counts), designMatrix)
exonDesignMatrix$samples <- rownames(exonDesignMatrix)
exonDesignMatrix$exonsamples <- colnames(counts)
rownames(exonDesignMatrix) <- exonDesignMatrix$exonsamples
exonDesignMatrix <- exonDesignMatrix[-c(grep("RS2",exonDesignMatrix$condition)),]
exonDesignMatrix<- exonDesignMatrix[-c(grep("HC7",exonDesignMatrix$condition)),]
exonmatrix <- exonmatrix[, c(1:6, which(colnames(exonmatrix) %in% rownames(exonDesignMatrix)))]
colnames(exonmatrix)
## [1] "GeneID" "Chr" "Start" "End" "Strand" "Length" "S3HC5_1"
## [8] "S3HC5_2" "S3HC5_3" "S3HC5_4" "S3HC5_6" "S3SD5_1" "S3SD5_2" "S3SD5_3"
## [15] "S3SD5_4" "S3SD5_6" "WTHC5_1" "WTHC5_2" "WTHC5_3" "WTHC5_4" "WTHC5_6"
## [22] "WTSD5_1" "WTSD5_2" "WTSD5_3" "WTSD5_4" "WTSD5_6"
annot <- exonmatrix[,c(1:6)]
counts <- exonmatrix[,c(7:dim(exonmatrix)[2])]
y.all <- DGEList(counts=counts, genes=annot)
y.all <- y.all[, colnames(counts)]
summary(is.na(y.all$genes))
## GeneID Chr Start End
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:222996 FALSE:222996 FALSE:222996 FALSE:222996
## Strand Length
## Mode :logical Mode :logical
## FALSE:222996 FALSE:222996
dge <- y.all
loading annotation file
## annotation downloaded from ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/GENE_INFO/Mammalia/
ncbi.annotation <- read.delim("../../data/annotation/Mus_musculus.gene_info",
comment.char = "%", header=TRUE, stringsAsFactors=FALSE)
map <- match(dge$genes$GeneID, ncbi.annotation$GeneID)
# map <- map[!is.na(map)]
# print("ERROR: NA ON CHROMOSOME")
# dge$genes$newChr <- ncbi.annotation$chromosome[map]
dge$genes$Chr <- gsub(pattern="chr", replacement="", x=dge$genes$Chr)
dge$genes$Symbol <- ncbi.annotation$Symbol[map]
dge$genes$Strand <- NULL
# dge$genes
dge.annot <- dge
summary(is.na(dge.annot$genes))
## GeneID Chr Start End
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:222996 FALSE:222996 FALSE:222996 FALSE:222996
##
## Length Symbol
## Mode :logical Mode :logical
## FALSE:222996 FALSE:220491
## TRUE :2505
filtering data
library(dplyr)
library(tidyr)
long_data <- gather(exonmatrix, key=condition, value=expression,
rownames(exonDesignMatrix))
long_data %>%
group_by(GeneID, condition) %>%
summarize(total = sum(expression)) %>%
group_by(GeneID) %>%
summarize(pass = sum(total > 20) >= 5) %>%
filter(pass) %>%
pull(GeneID) -> keep_ids
keep <- which(exonmatrix$GeneID %in% keep_ids)
# length(keep)
# dim(exonmatrix)
dge.annot <- dge.annot[keep, , keep.lib.sizes=FALSE]
Normalizations
TMM
dge.norm <- calcNormFactors(dge.annot)
dge.counts <- estimateCommonDisp(dge.norm)
PlotPCAPlotlyFunction(counts.data.frame=dge.counts$pseudo.counts,
design.matrix=exonDesignMatrix, shapeColname="condition",
colorColname="genotype", plotly.flag=TRUE, show.plot.flag=TRUE)
## [1] TRUE
loading negative controls
## negative controls
library(readxl)
sd.ctrls <- read_excel(path="../../data/controls/Additional File 4 full list of BMC genomics SD&RS2.xlsx", sheet=1)
sd.ctrls <- sd.ctrls[order(sd.ctrls$adj.P.Val),]
sd.neg.ctrls <- sd.ctrls[sd.ctrls$adj.P.Val > 0.7, ]
sd.neg.ctrls <- sd.neg.ctrls$`MGI Symbol`
sd.neg.ctrls <- sd.neg.ctrls[-which(is.na(sd.neg.ctrls))]
tab <- table(dge.counts$genes$Symbol)
single <- names(which(tab==1))
negcon <- which(dge.counts$genes$Symbol %in% intersect(sd.neg.ctrls, single))
# length(negcon)
exonDesignMatrix$gcondition <- droplevels(exonDesignMatrix$gcondition)
ruving
library(RUVSeq)
groups <- makeGroups(exonDesignMatrix$gcondition)
ruvedSExprData <- RUVs(as.matrix(round(dge.counts$pseudo.counts)), cIdx=negcon,
scIdx=groups, k=5)
normExprData <- ruvedSExprData$normalizedCounts
PlotPCAPlotlyFunction(counts.data.frame=normExprData,
design.matrix=exonDesignMatrix, shapeColname="condition",
colorColname="genotype", plotly.flag=TRUE, show.plot.flag=TRUE,
prefix.plot="TMM+RUVs")
## [1] TRUE
plotRLE(normExprData, outline=FALSE)

edgering with ruv
design <- model.matrix(~0+exonDesignMatrix$gcondition + ruvedSExprData$W)
colnames(design) <- c(levels(exonDesignMatrix$gcondition), paste0("W", 1:5))
dge.disp.des <- estimateDisp(dge.norm, design, robust=TRUE)
# dge.disp.des$pseudo.counts
# dge.counts$common.dispersion
# plotBCV(dge.disp.des)
fit <- glmFit(dge.disp.des, design)
KOSD5 - WTSD5
Differential expression
contr <- limma::makeContrasts(contrasts="KOSD5 - WTSD5", levels=design)
qlf <- glmLRT(fit, contrast=contr)
topTags(qlf, p.value=0.05, n=20)
## Coefficient: 1*KOSD5 -1*WTSD5
## GeneID Chr Start End Length Symbol logFC
## 72408 58234 15 89547626 89549882 2257 Shank3 -8.7534513
## 72409 58234 15 89557735 89560261 2527 Shank3 0.7104138
## 72404 58234 15 89533374 89533454 81 Shank3 1.0999900
## 72657 12805 15 92220447 92220479 33 Cntn1 1.8655127
## 72397 58234 15 89521103 89521373 271 Shank3 0.6915776
## 73190 634104 15 98220808 98221056 249 Olfr287 -6.4774186
## 72406 58234 15 89543100 89543230 131 Shank3 0.7775398
## 72402 58234 15 89532095 89532219 125 Shank3 0.6528003
## 73189 634104 15 98210004 98210123 120 Olfr287 -4.0643742
## 72389 58234 15 89500215 89500418 204 Shank3 0.7091774
## 72414 11434 15 89573832 89574585 754 Acr 1.1831918
## 72393 58234 15 89503540 89503707 168 Shank3 0.6926872
## 72654 12805 15 92128128 92128335 208 Cntn1 -6.0994072
## 73188 634104 15 98206971 98208377 1407 Olfr287 -3.2129539
## 72394 58234 15 89503797 89503913 117 Shank3 0.7546330
## 49819 100039826 13 12259813 12260273 461 <NA> 0.4544742
## 131933 73284 3 137625966 137628332 2367 Ddit4l 0.3714430
## 72392 58234 15 89503303 89503454 152 Shank3 0.6588258
## 72399 58234 15 89525160 89525273 114 Shank3 0.6481363
## 170987 68498 6 127949793 127953031 3239 Tspan11 0.5560605
## logCPM LR PValue FDR
## 72408 5.7438646 5912.87004 0.000000e+00 0.000000e+00
## 72409 7.2956758 173.38308 1.349924e-39 1.172004e-34
## 72404 2.7280258 130.33269 3.465448e-30 2.005802e-25
## 72657 0.0711896 70.37475 4.904355e-17 2.128981e-12
## 72397 3.3468886 63.03623 2.029395e-15 7.047684e-11
## 73190 -2.0498492 61.69349 4.013055e-15 1.161378e-10
## 72406 2.4094219 59.17273 1.444212e-14 3.582470e-10
## 72402 3.0971448 56.45919 5.737707e-14 1.245369e-09
## 73189 -1.8661725 52.51822 4.262748e-13 8.224263e-09
## 72389 2.5887533 49.98179 1.551797e-12 2.694541e-08
## 72414 1.1416585 49.43803 2.047383e-12 3.231886e-08
## 72393 2.4987683 47.97280 4.321734e-12 5.971279e-08
## 72654 -2.1793780 47.90642 4.470550e-12 5.971279e-08
## 73188 -0.1910068 41.63178 1.101873e-10 1.366637e-06
## 72394 1.7228002 39.25924 3.711082e-10 4.295948e-06
## 49819 3.6101228 33.71623 6.376655e-09 6.920264e-05
## 131933 4.8442894 33.58932 6.806534e-09 6.952274e-05
## 72392 1.9592242 33.41622 7.440104e-09 7.177220e-05
## 72399 1.7188706 28.52627 9.243592e-08 8.439784e-04
## 170987 2.2821088 28.42877 9.721013e-08 8.439784e-04
Alternative Splicing
sp <- diffSpliceDGE(fit, contrast=contr, geneid="GeneID", exonid="Start")
## Total number of exons: 173640
## Total number of genes: 15630
## Number of genes with 1 exon: 796
## Mean number of exons in a gene: 11
## Max number of exons in a gene: 314
topSpliceDGE(sp, test="Simes", FDR=0.05)
## GeneID Chr Symbol NExons P.Value FDR
## 72409 58234 15 Shank3 22 0.000000e+00 0.000000e+00
## 70774 27367 15 Rpl3 10 1.763053e-20 1.307657e-16
## 72678 12805 15 Cntn1 26 1.976317e-13 9.772230e-10
## 73447 22143 15 Tuba1b 4 8.382376e-09 3.108604e-05
exon plot
plotSpliceDGE(sp, geneid="Shank3", genecol="Symbol")

plotSpliceDGE(sp, geneid="Cntn1", genecol="Symbol")

plotSpliceDGE(sp, geneid="Rpl3", genecol="Symbol")

plotSpliceDGE(sp, geneid="Tuba1b", genecol="Symbol")

plotSpliceDGE(sp, geneid="Fabp7", genecol="Symbol")

plotSpliceDGE(sp, geneid="Hdac7", genecol="Symbol")

plotSpliceDGE(sp, geneid="Per3", genecol="Symbol")

plotSpliceDGE(sp, geneid="Kalrn", genecol="Symbol")

plotSpliceDGE(sp, geneid="Sgk1", genecol="Symbol")

plotSpliceDGE(sp, geneid="Dido1", genecol="Symbol")

edgering without ruv (QLfit)
design <- model.matrix(~0+exonDesignMatrix$gcondition)
colnames(design) <- levels(exonDesignMatrix$gcondition)
dge.disp.des <- estimateDisp(dge.norm, design, robust=TRUE)
fit <- glmFit(dge.disp.des, design)#glmQLFit(dge.disp.des, design)
KOSD5 - WTSD5
Differential expression (QLtest)
contr <- limma::makeContrasts(contrasts="KOSD5 - WTSD5", levels=design)
qlf <- glmLRT(fit, contrast=contr)#glmQLFTest(fit, contrast=contr)
topTags(qlf, n=20, p.value=0.05)
## Coefficient: 1*KOSD5 -1*WTSD5
## GeneID Chr Start End Length Symbol logFC
## 72408 58234 15 89547626 89549882 2257 Shank3 -8.8022894
## 72409 58234 15 89557735 89560261 2527 Shank3 0.7182385
## 73190 634104 15 98220808 98221056 249 Olfr287 -6.6280910
## 73188 634104 15 98206971 98208377 1407 Olfr287 -3.2926207
## 72654 12805 15 92128128 92128335 208 Cntn1 -6.2038428
## 72404 58234 15 89533374 89533454 81 Shank3 1.0362329
## 73189 634104 15 98210004 98210123 120 Olfr287 -4.2429463
## 72414 11434 15 89573832 89574585 754 Acr 1.2101703
## 72406 58234 15 89543100 89543230 131 Shank3 0.8154886
## 72657 12805 15 92220447 92220479 33 Cntn1 1.7057011
## 72402 58234 15 89532095 89532219 125 Shank3 0.6449115
## 73883 109901 15 100681143 100681288 146 Cela1 -4.0288217
## 72389 58234 15 89500215 89500418 204 Shank3 0.6897990
## 124624 26570 3 50364936 50372366 7431 Slc7a11 0.5835040
## logCPM LR PValue FDR
## 72408 5.74935846 2581.80484 0.000000e+00 0.000000e+00
## 72409 7.29565025 70.88405 3.788489e-17 3.289166e-12
## 73190 -2.04039759 59.77164 1.065272e-14 5.972940e-10
## 73188 -0.17532367 59.26803 1.375937e-14 5.972940e-10
## 72654 -2.18106438 53.62246 2.429664e-13 8.437737e-09
## 72404 2.73493738 51.50843 7.129051e-13 2.063147e-08
## 73189 -1.85552807 45.81991 1.296394e-11 3.215799e-07
## 72414 1.12615618 36.07217 1.901434e-09 4.127062e-05
## 72406 2.40620387 34.66236 3.921403e-09 7.565693e-05
## 72657 0.08011166 32.94057 9.501940e-09 1.649917e-04
## 72402 3.09506433 28.48043 9.465025e-08 1.494097e-03
## 73883 -2.50180153 24.24342 8.489681e-07 1.228457e-02
## 72389 2.59030798 23.97110 9.779280e-07 1.306211e-02
## 124624 5.80058770 21.32691 3.872572e-06 4.803096e-02
Alternative Splicing
sp1 <- diffSpliceDGE(fit, contrast=contr, geneid="GeneID", exonid="Start")
## Total number of exons: 173640
## Total number of genes: 15630
## Number of genes with 1 exon: 796
## Mean number of exons in a gene: 11
## Max number of exons in a gene: 314
topSpliceDGE(sp1, test="Simes", FDR=0.05)
## GeneID Chr Symbol NExons P.Value FDR
## 72409 58234 15 Shank3 22 0.000000e+00 0.000000e+00
## 70774 27367 15 Rpl3 10 8.409808e-22 6.237554e-18
## 72678 12805 15 Cntn1 26 1.275235e-12 6.305610e-09
## 73447 22143 15 Tuba1b 4 9.852492e-08 3.653797e-04
WriteDataFrameAsTsv(data.frame.to.save=topSpliceDGE(sp1, test="Simes", FDR=1,
number=Inf), file.name.path="KOSD5_WTSD5_diff_Splice_edger")
exon plot
plotSpliceDGE(sp1, geneid="Shank3", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Cntn1", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Rpl3", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Tuba1b", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Fabp7", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Hdac7", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Per3", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Kalrn", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Sgk1", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Dido1", genecol="Symbol")

WTSD5 - WTHC5
Differential expression (QLtest)
contr <- limma::makeContrasts(contrasts="WTSD5 - WTHC5", levels=design)
qlf <- glmLRT(fit, contrast=contr)#glmQLFTest(fit, contrast=contr)
topTags(qlf, n=20, p.value=0.05)
## Coefficient: -1*WTHC5 1*WTSD5
## GeneID Chr Start End Length Symbol logFC
## 89251 57435 17 56103417 56105501 2085 Plin4 4.026792
## 89248 57435 17 56100591 56102330 1740 Plin4 4.033977
## 89252 57435 17 56106096 56107374 1279 Plin4 3.524487
## 219969 14605 X 140542846 140543185 340 Tsc22d3 2.069058
## 219967 14605 X 140539528 140541107 1580 Tsc22d3 1.362289
## 84835 14229 17 28399095 28401151 2057 Fkbp5 1.596825
## 95516 106878 18 60474191 60475585 1395 Smim3 1.780130
## 89250 57435 17 56103110 56103297 188 Plin4 4.226817
## 174514 53417 7 17030993 17035965 4973 Hif3a 1.681145
## 84836 14229 17 28402609 28402848 240 Fkbp5 1.709063
## 85148 74762 17 29827958 29832399 4442 Mdga1 1.306150
## 126668 100502895 3 88032269 88033152 884 Gm19439 2.455018
## 85004 12575 17 29099344 29100722 1379 Cdkn1a 1.390424
## 29246 19416 11 59963181 59964366 1186 Rasd1 1.732685
## 186521 20887 7 126672870 126673597 728 Sult1a1 1.666580
## 84839 14229 17 28412034 28412124 91 Fkbp5 2.019623
## 102447 226115 19 41063420 41063979 560 Opalin -1.444720
## 19644 12696 10 80170982 80171653 672 Cirbp -1.087081
## 96233 17202 18 66857705 66860472 2768 Mc4r 1.577204
## 84841 14229 17 28428352 28428466 115 Fkbp5 1.998569
## logCPM LR PValue FDR
## 89251 2.6443083 533.65001 4.538102e-118 7.879961e-113
## 89248 2.2442001 472.55906 8.889999e-105 7.718297e-100
## 89252 1.6642483 320.29747 1.247690e-71 7.221632e-67
## 219969 3.3500243 243.09207 8.327502e-55 3.614968e-50
## 219967 6.1582271 205.72646 1.175647e-46 4.082786e-42
## 84835 5.1146822 189.29529 4.530594e-43 1.311154e-38
## 95516 2.7624054 176.70778 2.536778e-40 6.292658e-36
## 89250 -0.4171852 166.70449 3.881324e-38 8.424413e-34
## 174514 3.2706614 155.59155 1.039736e-35 2.005997e-31
## 84836 1.9322828 144.93730 2.216506e-33 3.848741e-29
## 85148 4.3521796 143.75198 4.025468e-33 6.354385e-29
## 126668 0.8899875 141.03557 1.580388e-32 2.286822e-28
## 85004 4.7712953 133.60940 6.651463e-31 8.884308e-27
## 29246 2.2265890 131.18397 2.256899e-30 2.799199e-26
## 186521 3.1588994 121.08847 3.654654e-28 4.230627e-24
## 84839 1.1119482 107.57110 3.337166e-25 3.621660e-21
## 102447 2.6327847 105.18833 1.110618e-24 1.134398e-20
## 19644 4.7282365 103.29383 2.889517e-24 2.787421e-20
## 96233 2.0361005 101.09557 8.765078e-24 8.010358e-20
## 84841 0.6531217 97.98025 4.225759e-23 3.668804e-19
Alternative Splicing
sp1 <- diffSpliceDGE(fit, contrast=contr, geneid="GeneID", exonid="Start")
## Total number of exons: 173640
## Total number of genes: 15630
## Number of genes with 1 exon: 796
## Mean number of exons in a gene: 11
## Max number of exons in a gene: 314
topSpliceDGE(sp1, test="Simes", FDR=0.05)
## GeneID Chr Symbol NExons P.Value FDR
## 78179 545156 16 Kalrn 61 3.668716e-11 5.442173e-07
## 173085 100037396 7 D030047H15Rik 3 1.412498e-10 1.047650e-06
## 15321 20393 10 Sgk1 19 1.602018e-09 7.921447e-06
## 213678 19652 X Rbm3 8 2.211915e-08 6.269619e-05
## 114243 12064 2 Bdnf 5 2.532590e-08 6.269619e-05
## 150287 231148 5 Ablim2 23 2.535912e-08 6.269619e-05
## 60654 16709 14 Ktn1 42 1.111824e-07 2.356114e-04
## 122598 23856 2 Dido1 17 2.610955e-07 4.841363e-04
## 24849 74309 11 Osbp2 14 3.632241e-07 5.986740e-04
## 126979 319830 3 1500004A13Rik 2 1.342960e-06 1.992147e-03
WriteDataFrameAsTsv(data.frame.to.save=topSpliceDGE(sp1, test="Simes", FDR=1,
number=Inf), file.name.path="WTSD5_WTHC5_diff_Splice_edger")
exon plot
plotSpliceDGE(sp1, geneid="Shank3", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Kalrn", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Pde4dip", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Pclo", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Fabp7", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Hdac7", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Per3", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Osbp2", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Ablim2", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Ppp2r5c", genecol="Symbol")

plotSpliceDGE(sp1, geneid="D030047H15Rik", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Kalrn", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Sgk1", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Dido1", genecol="Symbol")

KOSD5 - KOHC5
Differential expression (QLtest)
contr <- limma::makeContrasts(contrasts="KOSD5 - KOHC5", levels=design)
qlf <- glmLRT(fit, contrast=contr)#glmQLFTest(fit, contrast=contr)
topTags(qlf, n=20, p.value=0.05)
## Coefficient: -1*KOHC5 1*KOSD5
## GeneID Chr Start End Length Symbol logFC
## 89251 57435 17 56103417 56105501 2085 Plin4 4.154957
## 89248 57435 17 56100591 56102330 1740 Plin4 3.963197
## 89252 57435 17 56106096 56107374 1279 Plin4 3.703562
## 219969 14605 X 140542846 140543185 340 Tsc22d3 2.258351
## 84835 14229 17 28399095 28401151 2057 Fkbp5 1.822751
## 219967 14605 X 140539528 140541107 1580 Tsc22d3 1.458654
## 174514 53417 7 17030993 17035965 4973 Hif3a 1.929814
## 29246 19416 11 59963181 59964366 1186 Rasd1 2.110802
## 89250 57435 17 56103110 56103297 188 Plin4 4.189392
## 95516 106878 18 60474191 60475585 1395 Smim3 1.755609
## 126668 100502895 3 88032269 88033152 884 Gm19439 2.755095
## 84836 14229 17 28402609 28402848 240 Fkbp5 1.804273
## 186521 20887 7 126672870 126673597 728 Sult1a1 1.850060
## 96233 17202 18 66857705 66860472 2768 Mc4r 1.961696
## 85148 74762 17 29827958 29832399 4442 Mdga1 1.278733
## 85004 12575 17 29099344 29100722 1379 Cdkn1a 1.352018
## 84009 71840 17 25471590 25472255 666 Tekt4 3.968303
## 84837 14229 17 28406085 28406270 186 Fkbp5 1.924335
## 9807 100043257 1 150265535 150266066 532 Rbm3-ps -1.149630
## 19644 12696 10 80170982 80171653 672 Cirbp -1.148957
## logCPM LR PValue FDR
## 89251 2.6443083 546.2779 8.122753e-121 1.410435e-115
## 89248 2.2442001 455.3835 4.858892e-101 4.218490e-96
## 89252 1.6642483 336.8691 3.067190e-75 1.775289e-70
## 219969 3.3500243 279.8074 8.271227e-63 3.590540e-58
## 84835 5.1146822 243.1896 7.929691e-55 2.753823e-50
## 219967 6.1582271 233.7836 8.917448e-53 2.580709e-48
## 174514 3.2706614 200.9858 1.272652e-45 3.156905e-41
## 29246 2.2265890 186.1282 2.225909e-42 4.831335e-38
## 89250 -0.4171852 177.2309 1.950127e-40 3.762446e-36
## 95516 2.7624054 173.3757 1.354926e-39 2.352693e-35
## 126668 0.8899875 171.7133 3.125911e-39 4.934392e-35
## 84836 1.9322828 159.0777 1.799569e-36 2.603976e-32
## 186521 3.1588994 148.6580 3.406455e-34 4.549976e-30
## 96233 2.0361005 143.6157 4.311284e-33 5.347225e-29
## 85148 4.3521796 138.4908 5.691738e-32 6.588756e-28
## 85004 4.7712953 127.4303 1.495646e-29 1.623150e-25
## 84009 -1.0228279 126.1575 2.840277e-29 2.901092e-25
## 84837 1.5768174 123.6899 9.849171e-29 9.501167e-25
## 9807 4.5604543 118.5517 1.312861e-27 1.199817e-23
## 19644 4.7282365 116.4726 3.745089e-27 3.251487e-23
Alternative Splicing
sp1 <- diffSpliceDGE(fit, contrast=contr, geneid="GeneID", exonid="Start")
## Total number of exons: 173640
## Total number of genes: 15630
## Number of genes with 1 exon: 796
## Mean number of exons in a gene: 11
## Max number of exons in a gene: 314
topSpliceDGE(sp1, test="Simes", FDR=0.05)
## GeneID Chr Symbol NExons P.Value FDR
## 173085 100037396 7 D030047H15Rik 3 1.427152e-12 2.117038e-08
## 213678 19652 X Rbm3 8 2.001735e-10 1.484687e-06
## 122598 23856 2 Dido1 17 6.287719e-10 3.109067e-06
## 15321 20393 10 Sgk1 19 1.705489e-09 6.324807e-06
## 78179 545156 16 Kalrn 61 1.347739e-07 3.922097e-04
## 126979 319830 3 1500004A13Rik 2 1.586395e-07 3.922097e-04
## 219971 14605 X Tsc22d3 5 1.975956e-07 4.187334e-04
## 55935 108143 13 Taf9 7 2.634013e-07 4.884118e-04
## 187121 233908 7 Fus 15 8.310980e-07 1.369834e-03
## 128871 83679 3 Pde4dip 49 1.107124e-06 1.642308e-03
WriteDataFrameAsTsv(data.frame.to.save=topSpliceDGE(sp1, test="Simes", FDR=1,
number=Inf), file.name.path="KOSD5_KOHC5_diff_Splice_edger")
exon plot
plotSpliceDGE(sp1, geneid="Shank3", genecol="Symbol")

plotSpliceDGE(sp1, geneid="D030047H15Rik", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Pde4dip", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Pclo", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Fabp7", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Hdac7", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Per3", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Osbp2", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Tsc22d3", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Rbm3", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Kalrn", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Sgk1", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Dido1", genecol="Symbol")

KOHC5 - WTHC5
Differential expression (QLtest)
contr <- limma::makeContrasts(contrasts="KOHC5 - WTHC5", levels=design)
qlf <- glmLRT(fit, contrast=contr)#glmQLFTest(fit, contrast=contr)
topTags(qlf, n=20, p.value=0.05)
## Coefficient: 1*KOHC5 -1*WTHC5
## GeneID Chr Start End Length Symbol logFC logCPM
## 72408 58234 15 89547626 89549882 2257 Shank3 -9.0760811 5.74935846
## 72409 58234 15 89557735 89560261 2527 Shank3 0.7163855 7.29565025
## 72404 58234 15 89533374 89533454 81 Shank3 1.1667844 2.73493738
## 72654 12805 15 92128128 92128335 208 Cntn1 -3.9086996 -2.18106438
## 72657 12805 15 92220447 92220479 33 Cntn1 1.8140440 0.08011166
## 72414 11434 15 89573832 89574585 754 Acr 1.0215133 1.12615618
## 72402 58234 15 89532095 89532219 125 Shank3 0.7047861 3.09506433
## 72393 58234 15 89503540 89503707 168 Shank3 0.8068976 2.50111953
## 72389 58234 15 89500215 89500418 204 Shank3 0.7701363 2.59030798
## 72421 68708 15 89588188 89588217 30 Rabl2 -1.5991729 -1.27759730
## 72406 58234 15 89543100 89543230 131 Shank3 0.7215936 2.40620387
## 72394 58234 15 89503797 89503913 117 Shank3 0.8909634 1.72778432
## 72400 58234 15 89531343 89531418 76 Shank3 0.9607680 1.51731287
## 72585 66725 15 91814662 91814733 72 Lrrk2 -1.1545411 0.06575861
## 72397 58234 15 89521103 89521373 271 Shank3 0.6465971 3.34820542
## 72403 58234 15 89532329 89532461 133 Shank3 0.6297949 2.28078566
## 72390 58234 15 89501404 89501475 72 Shank3 0.9249444 0.74489213
## 72157 69440 15 89186754 89186819 66 Dennd6b -0.6566132 1.33597099
## 72415 68708 15 89582527 89583430 904 Rabl2 -0.4410104 4.29867874
## 72392 58234 15 89503303 89503454 152 Shank3 0.6778519 1.95573827
## LR PValue FDR
## 72408 2660.68384 0.000000e+00 0.000000e+00
## 72409 71.09526 3.403868e-17 2.955238e-12
## 72404 69.31786 8.380738e-17 4.850771e-12
## 72654 57.58474 3.237270e-14 1.405299e-09
## 72657 45.90491 1.241347e-11 4.310949e-07
## 72414 40.41998 2.048356e-10 5.927943e-06
## 72402 37.01172 1.174216e-09 2.912726e-05
## 72393 33.36129 7.653228e-09 1.484396e-04
## 72389 33.35100 7.693828e-09 1.484396e-04
## 72421 30.24947 3.798950e-08 6.596496e-04
## 72406 29.83175 4.712128e-08 6.871501e-04
## 72394 29.81672 4.748791e-08 6.871501e-04
## 72400 27.68891 1.424775e-07 1.903061e-03
## 72585 24.76712 6.469153e-07 8.023598e-03
## 72397 22.20103 2.455414e-06 2.842387e-02
## 72403 21.84199 2.960504e-06 3.212887e-02
## 72390 21.70289 3.183118e-06 3.251275e-02
## 72157 21.30286 3.921463e-06 3.782904e-02
## 72415 21.12065 4.312550e-06 3.941217e-02
## 72392 20.83469 5.006785e-06 4.346891e-02
Alternative Splicing
sp1 <- diffSpliceDGE(fit, contrast=contr, geneid="GeneID", exonid="Start")
## Total number of exons: 173640
## Total number of genes: 15630
## Number of genes with 1 exon: 796
## Mean number of exons in a gene: 11
## Max number of exons in a gene: 314
topSpliceDGE(sp1, test="Simes", FDR=0.05)
## GeneID Chr Symbol NExons P.Value FDR
## 72409 58234 15 Shank3 22 0.000000e+00 0.000000e+00
## 70774 27367 15 Rpl3 10 6.015952e-18 4.462032e-14
## 72678 12805 15 Cntn1 26 4.771889e-14 2.359540e-10
WriteDataFrameAsTsv(data.frame.to.save=topSpliceDGE(sp1, test="Simes", FDR=1,
number=Inf), file.name.path="KOHC5_WTHC5_diff_Splice_edger")
exon plot
plotSpliceDGE(sp1, geneid="Shank3", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Cntn1", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Stmn1", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Kif21a", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Rpl3", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Fabp7", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Hdac7", genecol="Symbol")

plotSpliceDGE(sp1, geneid="Per3", genecol="Symbol")
